---
title: "cbps_arrest"
author: "Garrett Baker (garrett.baker@duke.edu)"
knit: (function(inputFile, encoding) {
  rmarkdown::render(inputFile, encoding = encoding, output_dir = "filepath") })
date: "`r Sys.Date()`"
output: html_document
---

```{r}
library(haven)
library(tidyverse)
library(cobalt)
library(MatchIt)
library(WeightIt)
library(MatchThem)
library(CBPS)
library(texreg)
library(estimatr)
library(mice)
library(logistf)
```



```{r}
var_order <- c("fam_genprobs_indic", 
               "ptebdo1", 
               "pceduc_stand_2", 
               "ptceff1", 
               "ses_nc_w1_2", 
               "ses_nc_w1_3", 
               "fam_drugalc_indic", 
               "sp_ethn_aw_3", 
               "fam_depression_indic", 
               "fam_medical_indic",
               "ses_nc_w1_1", 
               "parent_legal_stand",
               "aptitude_equalized",
               "pceduc_stand_1",
               "pcimmigrant_w1_1",
               "sp_ethn_aw_1",
               "pcsingle_stand",
               "pceduc_stand_0",
               "cohort_numeric_9",
               "pcpubassist_stand",
               "cbc_total_equalized",
               "pcimmigrant_w1_3",
               "sp_male",
               "sp_ethn_aw_2",
               "sp_ethn_aw_4",
               "pcimmigrant_w1_2",
               "w4state_il",
               "prop.score"
               )

var_label <- c("Family General Problems", 
               "Neighborhood Disorder", 
               "PC Graduated College", 
               "Neighborhood Collective Efficacy", 
               "Neighborhood SES Medium", 
               "Neighborhood SES High", 
               "Family Substance Use Issues", 
               "Hispanic", 
               "Family Depression", 
               "Family Health Issues",
               "Neighborhood SES Low", 
               "Parent CJ Contact",
               "Child Aptitude",
               "PC Graduated High School",
               "PC 1st Gen Immigrant",
               "White",
               "PC Single",
               "PC High School Dropout",
               "Cohort 9",
               "PC Public Assistance Receipt",
               "Child Behavior (CBCL)",
               "PC 3rd Gen Immigrant",
               "Gender (Male)",
               "Black",
               "Other Race",
               "PC 2nd Gen Immigrant",
               "Move IL W4",
               "Propensity Score"
               )


love_names = var_label
names(love_names) = var_order

```


## With mi 

```{r}
d <- read_dta("filepath/arrest_educ_cleandata.dta") %>%
  filter(!is.na(arrestedbefore_19),
         !is.na(w4w5_college4yr),
         !is.na(w4state_il),
         cohort_numeric == 0 | cohort_numeric == 9) %>% 
  mutate_at(vars(cohort_numeric, sp_ethn_aw, sp_male, pceduc_stand, pcpubassist_stand, pcimmigrant_w1, pcsingle_stand, parent_legal_stand, fam_depression_indic, fam_medical_indic, fam_drugalc_indic, fam_genprobs_indic, ses_nc_w1, w4state_il),
            ~factor(.))

d2 <- d %>% 
  dplyr::select(w4w5_college4yr, arrestedbefore_19, cohort_numeric, sp_ethn_aw, sp_male, cbc_total_equalized, aptitude_equalized, pceduc_stand, pcpubassist_stand, pcimmigrant_w1, pcsingle_stand, parent_legal_stand, fam_depression_indic, fam_medical_indic, fam_drugalc_indic, fam_genprobs_indic, ptceff1, ptebdo1, ses_nc_w1, w4state_il, w4w5_wgt_trim)
```


```{r, include = FALSE}
var_list <- c("w4w5_college4yr", "arrestedbefore_19", "cohort_numeric", "sp_ethn_aw", "sp_male", "cbc_total_equalized", "aptitude_equalized", "pceduc_stand", "pcpubassist_stand", "pcimmigrant_w1", "pcsingle_stand", "parent_legal_stand", "fam_depression_indic", "fam_medical_indic", "fam_drugalc_indic", "fam_genprobs_indic", "ptceff1", "ptebdo1", "ses_nc_w1", "w4state_il")

tempData_pool <- mice(d2, m = 10, maxit = 20, seed = 100, include = var_list)
```


```{r}
weighted_pool_data <- weightthem(arrestedbefore_19 ~ cohort_numeric + sp_ethn_aw + sp_male + cbc_total_equalized + aptitude_equalized + pceduc_stand + pcpubassist_stand + pcimmigrant_w1 + pcsingle_stand + parent_legal_stand + fam_depression_indic + fam_medical_indic + fam_drugalc_indic + fam_genprobs_indic + ptceff1 + ptebdo1 + ses_nc_w1 + w4state_il, 
                  data = tempData_pool, 
                  method = "cbps", 
                  estimand = "ATT") 
```

```{r}
love.plot(weighted_pool_data,
          binary = "std",
          stats = c("m"#,"ks"
                    ),
          sd.denom = "treat",
          var.order = "unadjusted",
          var.names = love_names,
          abs = TRUE,
          colors = c("indianred3", "skyblue3"),
          shapes = c("triangle filled", "circle"),
          title = "Covariate Balance Before and After Adjustment") + 
  geom_vline(xintercept = 0.1, linetype = "dashed", color = "black") +
  scale_x_continuous(breaks=seq(0.0, 0.8, 0.1)) +
  coord_cartesian(xlim = c(0, 0.84)) +
  labs(x = "Absolute Standardized Mean Differences") +
  theme(legend.position = c(.8, .5),
        legend.box.background = element_rect(), 
        legend.box.margin = margin(1, 1, 1, 1))


love.plot(weighted_pool_data,
          binary = "std",
          stats = c("m","ks"
                    ),
          sd.denom = "treat",
          var.order = "unadjusted",
          var.names = love_names,
          abs = TRUE,
          colors = c("indianred3", "skyblue3"))
```



### Models

#### LPM unweighted (Table 1)

```{r} 
# Model
weighted_pool_model <- with(weighted_pool_data,
                        estimatr::lm_robust(w4w5_college4yr ~ arrestedbefore_19 + cohort_numeric + sp_ethn_aw + sp_male + cbc_total_equalized + aptitude_equalized + pcpubassist_stand + pceduc_stand + relevel(pcimmigrant_w1, ref = "3") + pcsingle_stand + parent_legal_stand + fam_depression_indic + fam_medical_indic + fam_drugalc_indic + fam_genprobs_indic + ptceff1 + ptebdo1 + ses_nc_w1 + w4state_il))

weighted_pool_model |> pool() |> summary(conf.int = TRUE)
``` 


```{r}
weighted_pool_results <- pool(weighted_pool_model)

screenreg(list(weighted_pool_results),
          digits = 2)
```





#### LPM weighted (Appendix Table A2)


```{r}
weighted_pool_data_w <- weightthem(arrestedbefore_19 ~ cohort_numeric + sp_ethn_aw + sp_male + cbc_total_equalized + aptitude_equalized + pcpubassist_stand + pceduc_stand + pcimmigrant_w1 + pcsingle_stand + parent_legal_stand + fam_depression_indic + fam_medical_indic + fam_drugalc_indic + fam_genprobs_indic + ptceff1 + ptebdo1 + ses_nc_w1 + w4state_il, 
                  data = tempData_pool, 
                  method = "cbps", 
                  estimand = "ATT",
                  s.weights = "w4w5_wgt_trim") 
```

```{r}
love.plot(weighted_pool_data_w,
          binary = "std",
          stats = c("m"#,"ks"
                    ),
          sd.denom = "treat",
          var.order = "adjusted",
          abs = TRUE,
          title = "Covariate Balance Before and After Adjustment")
```



```{r} 
# Model
weighted_pool_model_w <- with(weighted_pool_data_w,
                        WeightIt::lm_weightit(w4w5_college4yr ~ arrestedbefore_19 + cohort_numeric + sp_ethn_aw + sp_male + cbc_total_equalized + aptitude_equalized + pcpubassist_stand + pceduc_stand + relevel(pcimmigrant_w1, ref = "3") + pcsingle_stand + parent_legal_stand + fam_depression_indic + fam_medical_indic + fam_drugalc_indic + fam_genprobs_indic + ptceff1 + ptebdo1 + ses_nc_w1 + w4state_il))
``` 


```{r}
weighted_pool_results_w <- pool(weighted_pool_model_w)

screenreg(list(weighted_pool_results_w),
          digits = 2)
```



#### Logit unweighted (Appendix Table A3)

```{r} 
# Model
weighted_pool_model2 <- with(weighted_pool_data,
                            glm_weightit(w4w5_college4yr ~ arrestedbefore_19 + cohort_numeric + sp_ethn_aw + sp_male + cbc_total_equalized + aptitude_equalized + pcpubassist_stand + pceduc_stand + relevel(pcimmigrant_w1, ref = "3") + pcsingle_stand + parent_legal_stand + fam_depression_indic + fam_medical_indic + fam_drugalc_indic + fam_genprobs_indic + ptceff1 + ptebdo1 + ses_nc_w1 + w4state_il,
                            family = "binomial"))

weighted_pool_results2 <- pool(weighted_pool_model2)
``` 


```{r}
summary(weighted_pool_results2, conf.int = TRUE)

screenreg(list(weighted_pool_results2),
          digits = 2)
```









